qstart <- 1:100
tstart <- 1:100
plot(qstart, tstart)
title( main = "Identity" )
abline( a = 0, b = 1, lty = 3, col = "#1E90FF" )
cbind(qstart, tstart)[1:7, ]
## qstart tstart
## [1,] 1 1
## [2,] 2 2
## [3,] 3 3
## [4,] 4 4
## [5,] 5 5
## [6,] 6 6
## [7,] 7 7
tstart2 <- tstart
tstart2 <- max(tstart2) - tstart2
plot(qstart, tstart2)
abline( a = 0, b = 1, lty = 3, col = "#1E90FF" )
title( main = "Inverted sequence" )
cbind(qstart, tstart2)[1:7, ]
## qstart tstart2
## [1,] 1 99
## [2,] 2 98
## [3,] 3 97
## [4,] 4 96
## [5,] 5 95
## [6,] 6 94
## [7,] 7 93
tstart2 <- tstart
tstart2[20:60] <- tstart2[60:20]
plot(qstart, tstart2)
abline( a = 0, b = 1, lty = 3, col = "#1E90FF" )
title( main = "Inversion (within sequence)" )
tstart2 <- tstart
tstart2[40:length(tstart2)] <- tstart2[40:length(tstart2)] + 20
plot(qstart, tstart2)
abline( a = 0, b = 1, lty = 3, col = "#1E90FF" )
title( main = "Insertion (within sequence)" )
abline( a = 40, b = 0, lty = 3, col = "#B22222" )
abline( a = 60, b = 0, lty = 3, col = "#B22222" )
#abline( v = seq(1, 100, by = 10), lty = 3, col = "#B22222" )
#abline( h = seq(1, 200, by = 20), lty = 3, col = "#B22222" )
tstart2 <- tstart
tstart2[20:60] <- tstart2[20:60] + 20
plot(qstart, tstart2)
abline( a = 0, b = 1, lty = 3, col = "#1E90FF" )
title( main = "Insertion (within sequence)" )
abline( a = 60, b = 0, lty = 3, col = "#B22222" )
abline( a = 80, b = 0, lty = 3, col = "#B22222" )
tstart2 <- tstart
tstart2[40:length(tstart2)] <- tstart2[40:length(tstart2)] + 20
plot(qstart, tstart2)
abline( a = 0, b = 1, lty = 3, col = "#1E90FF" )
title( main = "Insertion with extra alignment" )
#abline( a = 40, b = 0, lty = 3, col = "#B22222" )
#abline( a = 60, b = 0, lty = 3, col = "#B22222" )
points( x = 80:90, y = 10:20)
tstart2 <- tstart
#tstart2[20:60] <- tstart2[60:20]
set.seed(9)
tstart2[20:80] <- sample(tstart2[20:80])
plot(qstart, tstart2)
abline( a = 0, b = 1, lty = 3, col = "#1E90FF" )
title( main = "Spaghetti plate in the middle" )
https://cran.r-project.org/web/packages/pafr/index.html
library(pafr)
## Loading required package: ggplot2
#my_paf <- read_paf("CBDRx_Purple_Kush.paf")
my_paf <- read_paf("CBDRx_Purple_Kush.paf.gz", include_tags = TRUE)
my_paf
## pafr object with 290411 alignments (792.6Mb)
## 6527 query seqs
## 163 target seqs
## 12 tags: NM, ms, AS, nn, tp, cm, s1, s2, de, zd, rl ...
#
as.data.frame(my_paf)[1:3, 1:23]
## qname qlen qstart qend strand tname tlen tstart
## 1 CM010790.2 79255070 4540967 4626503 + NC_044374.1 88181582 3677124
## 2 CM010790.2 79255070 23140036 23192664 + NC_044374.1 88181582 56189690
## 3 CM010790.2 79255070 71982825 72027138 + NC_044370.1 104987320 102213668
## tend nmatch alen mapq NM ms AS nn tp cm s1 s2 de zd
## 1 3762821 85435 85726 60 291 78457 78457 0 P 8198 83929 4879 0.0023 1
## 2 56242439 52353 52788 60 435 43918 43946 0 P 4220 44699 2663 0.0061 3
## 3 102257983 44297 44323 60 26 43680 43584 0 P 4387 44245 0 0.0005 3
## rl
## 1 15578069
## 2 15578069
## 3 15578069
#as.data.frame(my_paf)[1:3, 1:12]
prim_alignment <- filter_secondary_alignments(my_paf)
nrow(my_paf)
## [1] 290411
nrow(prim_alignment)
## [1] 230879
dotplot(prim_alignment,
#order_by = "provided",
label_seqs = TRUE,
xlab = "Purple Kush",
ylab = "CBDRx")
as.data.frame(prim_alignment)[1:3, 1:12]
## qname qlen qstart qend strand tname tlen tstart
## 1 CM010790.2 79255070 4540967 4626503 + NC_044374.1 88181582 3677124
## 2 CM010790.2 79255070 23140036 23192664 + NC_044374.1 88181582 56189690
## 3 CM010790.2 79255070 71982825 72027138 + NC_044370.1 104987320 102213668
## tend nmatch alen mapq
## 1 3762821 85435 85726 60
## 2 56242439 52353 52788 60
## 3 102257983 44297 44323 60
# table(prim_alignment$tname)
ali <- prim_alignment[grep("NC_044", prim_alignment$tname), ]
nrow(ali)
## [1] 224946
hist(ali$qlen)
ali <- ali[ali$qlen > 2e07, ]
nrow(ali)
## [1] 150680
p <- dotplot(ali,
#order_by = "provided",
label_seqs = TRUE,
xlab = "Purple Kush",
ylab = "CBDRx")
p <- p + theme_bw()
p
table(ali$tname)
##
## NC_044370.1 NC_044371.1 NC_044372.1 NC_044373.1 NC_044374.1 NC_044375.1
## 15791 13964 16752 16198 16238 16109
## NC_044376.1 NC_044377.1 NC_044378.1 NC_044379.1
## 11738 17526 15078 11286
chrom3 <- ali[ali$tname == "NC_044372.1", ]
p <- dotplot(chrom3,
#order_by = "provided",
label_seqs = TRUE,
xlab = "Purple Kush",
ylab = "CBDRx")
p <- p + theme_bw()
p
sort(table(ali$qname), decreasing = TRUE)
##
## CM010790.2 CM010794.2 CM010796.2 CM010792.2 CM010793.2 CM010797.2 CM010795.2
## 19448 18750 17831 16977 16956 14329 14244
## CM010799.2 CM010798.2 CM010791.2
## 13952 11366 6827
#chrom3a <- chrom3[chrom3$qname == "CM010790.2", ]
#chrom3a <- chrom3[chrom3$qname == "CM010794.2", ]
#chrom3a <- chrom3[grep("CM010793.2|CM010794.2|CM010795.2", chrom3$qname), ]
#chrom3a <- chrom3[grep("CM010790.2|CM010792.2|CM010793.2|CM010794.2", chrom3$qname), ]
#chrom3a <- chrom3[grep("CM010790.2|CM010791.2|CM010792.2|CM010793.2|CM010794.2", chrom3$qname), ]
chrom3a <- chrom3[grep("CM010790.2|CM010794.2|CM010792.2", chrom3$qname), ]
#chrom3a <- chrom3[chrom3$qname == "CM010791.2", ]
p <- dotplot(chrom3a,
#order_by = "provided",
label_seqs = TRUE,
xlab = "Purple Kush",
ylab = "CBDRx")
p <- p + theme_bw()
p <- p + theme( axis.text.x = element_text(angle = 60, hjust = 1) )
p
pointdata <- data.frame(
xname = c(0, 20e6, 20e6, 0, 100e6),
ypos = c(0, 0, 20e6, 20e6, 20e6),
ptyname = c(1, 1, 1, 1, 1)
)
p + geom_point(data = pointdata,
mapping = aes(x = xname, y = ypos, shape = factor(ptyname)),
color = "#B22222", size = 4)
# ggsave(filename = "CBDRx_chrom3_PK_dotplot.tiff",
# device = "tiff",
# width = 3.25, height = 3.25, units = "in", dpi = 300,
# compression = "lzw")
#p + geom_segment()
as.data.frame(chrom3a)[1:3, 1:12]
## qname qlen qstart qend strand tname tlen tstart
## 88 CM010790.2 79255070 38631731 38652977 - NC_044372.1 94670641 26983832
## 92 CM010790.2 79255070 12394945 12425856 + NC_044372.1 94670641 27176310
## 113 CM010790.2 79255070 47893129 47913001 + NC_044372.1 94670641 23994558
## tend nmatch alen mapq
## 88 27005139 21197 21316 60
## 92 27208748 30492 32617 60
## 113 24014452 19782 19911 60
chrom3b <- chrom3a[chrom3a$qname == "CM010794.2", ]
p <- dotplot(chrom3b,
#order_by = "provided",
label_seqs = TRUE,
xlab = "Purple Kush",
ylab = "CBDRx")
p <- p + theme_bw()
p <- p + theme( axis.text.x = element_text(angle = 60, hjust = 1) )
p
# p + geom_segment(data = chrom3a,
# aes_string(x = "qstart", xend = "qend", y = "tstart", yend = "tend"),
# size=1.2, colour="#B22222")
line_size <- 1.2
alignment_colour <- "#1E90FF"
p + geom_segment(data = chrom3b[chrom3b$strand == "+", ],
aes(x = qstart, xend = qend, y = tstart, yend = tend),
#size=1.2,
linewidth=line_size,
colour=alignment_colour) +
geom_segment(data = chrom3b[chrom3b$strand=="-",],
aes(x = qend, xend = qstart, y = tstart, yend = tend),
size = line_size,
colour = alignment_colour)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
pointdata <- data.frame(
xname = c(0, 20e6, 20e6, 0),
ypos = c(0, 0, 20e6, 20e6),
ptyname = c(1, 1, 1, 1)
)
p + geom_point(data = pointdata,
mapping = aes(x = xname, y = ypos, shape = factor(ptyname)),
color = "#B22222", size = 4)
p + geom_point(data = pointdata,
mapping = aes(x = xname, y = ypos, shape = factor(ptyname)),
color = "#B22222", size = 4) + theme(legend.position="none")
chrom3b
## pafr object with 9355 alignments (36.1Mb)
## 1 query seqs
## 1 target seqs
## 12 tags: NM, ms, AS, nn, tp, cm, s1, s2, de, zd, rl ...
my_df <- as.data.frame(chrom3b)
my_df[ my_df$qstart > 70e6 & my_df$tstart < 10e6, 1:12]
## qname qlen qstart qend strand tname tlen tstart
## 56427 CM010794.2 78152476 72764325 72769367 + NC_044372.1 94670641 9756412
## 57394 CM010794.2 78152476 75343554 75348186 + NC_044372.1 94670641 3159184
## 64475 CM010794.2 78152476 75342123 75343006 + NC_044372.1 94670641 3157006
## 64605 CM010794.2 78152476 75336176 75338001 + NC_044372.1 94670641 3237442
## 67742 CM010794.2 78152476 75338909 75341018 + NC_044372.1 94670641 3146407
## 68762 CM010794.2 78152476 75334656 75335586 + NC_044372.1 94670641 3235921
## 68849 CM010794.2 78152476 75338488 75339725 + NC_044372.1 94670641 3239680
## 69133 CM010794.2 78152476 75025700 75026457 - NC_044372.1 94670641 2513295
## tend nmatch alen mapq
## 56427 9761456 5001 5044 60
## 57394 3164279 4548 5108 3
## 64475 3157889 867 884 2
## 64605 3239215 1729 1830 60
## 67742 3148530 2019 2131 60
## 68762 3236854 906 933 60
## 68849 3241099 1195 1423 53
## 69133 2514053 742 759 22